-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Serialize object fix #11349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Serialize object fix #11349
Conversation
The removed if in var.c caused serialize to not handle object references correctly under certain circumstances. See tests/serialize/serialization_objects_019.phpt The bug was originally introduced in commit 6c5942f, and the problematic line was last modified in commit bb0b4eb. (Fixes oss-fuzz #44954) The testcase from bb0b4eb still passes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to play with the patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The memory error should be fixed first.
I propose the following patch on top of your PR diff --git a/ext/standard/var.c b/ext/standard/var.c
index 08a9996472..303e77b850 100644
--- a/ext/standard/var.c
+++ b/ext/standard/var.c
@@ -652,13 +652,12 @@ PHP_FUNCTION(var_export)
}
/* }}} */
-static void php_var_serialize_intern(smart_str *buf, zval *struc, php_serialize_data_t var_hash, bool unique, bool subtract_rc);
+static void php_var_serialize_intern(smart_str *buf, zval *struc, php_serialize_data_t var_hash, bool in_rcn_array, bool is_root);
/**
- * @param bool unique Whether the element can only appear once in the serialized data. It can appear
- * multiple times if it is embedded in an array with RC > 1.
+ * @param bool in_rcn_array Whether the element appears in an array with RC > 1.
*/
-static inline zend_long php_add_var_hash(php_serialize_data_t data, zval *var, bool unique) /* {{{ */
+static inline zend_long php_add_var_hash(php_serialize_data_t data, zval *var, bool in_rcn_array) /* {{{ */
{
zval *zv;
zend_ulong key;
@@ -670,7 +669,7 @@ static inline zend_long php_add_var_hash(php_serialize_data_t data, zval *var, b
/* pass */
} else if (Z_TYPE_P(var) != IS_OBJECT) {
return 0;
- } else if (unique
+ } else if (!in_rcn_array
&& Z_REFCOUNT_P(var) == 1
&& (Z_OBJ_P(var)->properties == NULL || GC_REFCOUNT(Z_OBJ_P(var)->properties) == 1)) {
return 0;
@@ -929,7 +928,7 @@ static int php_var_serialize_get_sleep_props(
}
/* }}} */
-static void php_var_serialize_nested_data(smart_str *buf, zval *struc, HashTable *ht, uint32_t count, bool incomplete_class, php_serialize_data_t var_hash, bool unique) /* {{{ */
+static void php_var_serialize_nested_data(smart_str *buf, zval *struc, HashTable *ht, uint32_t count, bool incomplete_class, php_serialize_data_t var_hash, bool in_rcn_array) /* {{{ */
{
smart_str_append_unsigned(buf, count);
smart_str_appendl(buf, ":{", 2);
@@ -959,19 +958,19 @@ static void php_var_serialize_nested_data(smart_str *buf, zval *struc, HashTable
if (Z_TYPE_P(data) == IS_ARRAY) {
if (UNEXPECTED(Z_IS_RECURSIVE_P(data))
|| UNEXPECTED(Z_TYPE_P(struc) == IS_ARRAY && Z_ARR_P(data) == Z_ARR_P(struc))) {
- php_add_var_hash(var_hash, struc, unique);
+ php_add_var_hash(var_hash, struc, in_rcn_array);
smart_str_appendl(buf, "N;", 2);
} else {
if (Z_REFCOUNTED_P(data)) {
Z_PROTECT_RECURSION_P(data);
}
- php_var_serialize_intern(buf, data, var_hash, unique, false);
+ php_var_serialize_intern(buf, data, var_hash, in_rcn_array, false);
if (Z_REFCOUNTED_P(data)) {
Z_UNPROTECT_RECURSION_P(data);
}
}
} else {
- php_var_serialize_intern(buf, data, var_hash, unique, false);
+ php_var_serialize_intern(buf, data, var_hash, in_rcn_array, false);
}
} ZEND_HASH_FOREACH_END();
}
@@ -986,13 +985,13 @@ static void php_var_serialize_class(smart_str *buf, zval *struc, HashTable *ht,
if (php_var_serialize_get_sleep_props(&props, struc, ht) == SUCCESS) {
php_var_serialize_class_name(buf, struc);
php_var_serialize_nested_data(
- buf, struc, &props, zend_hash_num_elements(&props), /* incomplete_class */ 0, var_hash, GC_REFCOUNT(&props) == 1);
+ buf, struc, &props, zend_hash_num_elements(&props), /* incomplete_class */ 0, var_hash, GC_REFCOUNT(&props) > 1);
}
zend_hash_destroy(&props);
}
/* }}} */
-static void php_var_serialize_intern(smart_str *buf, zval *struc, php_serialize_data_t var_hash, bool unique, bool subtract_rc) /* {{{ */
+static void php_var_serialize_intern(smart_str *buf, zval *struc, php_serialize_data_t var_hash, bool in_rcn_array, bool is_root) /* {{{ */
{
zend_long var_already;
HashTable *myht;
@@ -1001,7 +1000,7 @@ static void php_var_serialize_intern(smart_str *buf, zval *struc, php_serialize_
return;
}
- if (var_hash && (var_already = php_add_var_hash(var_hash, struc, unique))) {
+ if (var_hash && (var_already = php_add_var_hash(var_hash, struc, in_rcn_array))) {
if (var_already == -1) {
/* Reference to an object that failed to serialize, replace with null. */
smart_str_appendl(buf, "N;", 2);
@@ -1110,7 +1109,7 @@ again:
if (Z_ISREF_P(data) && Z_REFCOUNT_P(data) == 1) {
data = Z_REFVAL_P(data);
}
- php_var_serialize_intern(buf, data, var_hash, Z_REFCOUNT_P(&retval) == 1, false);
+ php_var_serialize_intern(buf, data, var_hash, Z_REFCOUNT_P(&retval) > 1, false);
} ZEND_HASH_FOREACH_END();
smart_str_appendc(buf, '}');
@@ -1232,7 +1231,7 @@ again:
prop = Z_REFVAL_P(prop);
}
- php_var_serialize_intern(buf, prop, var_hash, true, false);
+ php_var_serialize_intern(buf, prop, var_hash, false, false);
}
smart_str_appendc(buf, '}');
} else {
@@ -1247,7 +1246,7 @@ again:
if (count > 0 && incomplete_class) {
--count;
}
- php_var_serialize_nested_data(buf, struc, myht, count, incomplete_class, var_hash, GC_REFCOUNT(myht) == 1);
+ php_var_serialize_nested_data(buf, struc, myht, count, incomplete_class, var_hash, GC_REFCOUNT(myht) > 1);
zend_release_properties(myht);
return;
}
@@ -1255,7 +1254,8 @@ again:
smart_str_appendl(buf, "a:", 2);
myht = Z_ARRVAL_P(struc);
php_var_serialize_nested_data(
- buf, struc, myht, zend_array_count(myht), /* incomplete_class */ 0, var_hash, unique && (GC_REFCOUNT(myht) - (subtract_rc ? 1 : 0)) == 1);
+ buf, struc, myht, zend_array_count(myht), /* incomplete_class */ 0, var_hash,
+ !is_root && (in_rcn_array || GC_REFCOUNT(myht) > 1));
return;
case IS_REFERENCE:
struc = Z_REFVAL_P(struc);
@@ -1269,17 +1269,11 @@ again:
PHPAPI void php_var_serialize(smart_str *buf, zval *struc, php_serialize_data_t *data) /* {{{ */
{
- php_var_serialize_intern(buf, struc, *data, true, false);
+ php_var_serialize_intern(buf, struc, *data, false, true);
smart_str_0(buf);
}
/* }}} */
-static void php_var_serialize_cv(smart_str *buf, zval *struc, php_serialize_data_t *data)
-{
- php_var_serialize_intern(buf, struc, *data, true, true);
- smart_str_0(buf);
-}
-
PHPAPI php_serialize_data_t php_var_serialize_init(void) {
struct php_serialize_data *d;
/* fprintf(stderr, "SERIALIZE_INIT == lock: %u, level: %u\n", BG(serialize_lock), BG(serialize).level); */
@@ -1320,30 +1314,8 @@ PHP_FUNCTION(serialize)
Z_PARAM_ZVAL(struc)
ZEND_PARSE_PARAMETERS_END();
- bool data_is_cv = false;
- if (Z_TYPE_P(struc) == IS_ARRAY
- && !(GC_FLAGS(Z_ARRVAL_P(struc)) & IS_ARRAY_IMMUTABLE)
- && EG(current_execute_data)
- && EG(current_execute_data)->prev_execute_data) {
- zend_execute_data *execute_data = EG(current_execute_data)->prev_execute_data;
- if (execute_data->func && ZEND_USER_CODE(execute_data->func->type)) {
- zend_op_array *func = &execute_data->func->op_array;
- const zend_op *opline = execute_data->opline;
- if (func->opcodes < opline) {
- const zend_op *prev_opline = opline - 1;
- if ((prev_opline->opcode == ZEND_SEND_VAR || prev_opline->opcode == ZEND_SEND_VAR_EX) && prev_opline->op1_type == IS_CV) {
- data_is_cv = true;
- }
- }
- }
- }
-
PHP_VAR_SERIALIZE_INIT(var_hash);
- if (data_is_cv) {
- php_var_serialize_cv(&buf, struc, &var_hash);
- } else {
- php_var_serialize(&buf, struc, &var_hash);
- }
+ php_var_serialize(&buf, struc, &var_hash);
PHP_VAR_SERIALIZE_DESTROY(var_hash);
if (EG(exception)) { |
@dstogov Much better naming, thank you! |
718dae4
to
e374a03
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The final patch definitely looks better. I think it's OK now.
@nielsdos can you please make a final review
The high-level idea makes sense. However I found a variation of the original code that still doesn't work. I only had a brief look. I didn't have time yet to do more testing or debugging to find the root cause. May be the root cause is different. <?php
function test() {
$root = new stdClass;
$end = new stdClass;
$root->a = [$end];
$end->c = [$root->a, $root->a];
return $root;
}
var_dump(unserialize($s = serialize(test())));
var_dump(test());
var_dump($s); This gives this output:
The first var_dump shows the unserialisation after it was serialised, and the second dump shows what I expected to see. The entries of the c array are NULL. |
@nielsdos Thank you for checking! Your test case seems to fail here: Lines 942 to 945 in a02f7f2
It seems that non-referencial recursive arrays are not supported. Instead, we just replace them with |
Okay this looks good then. |
@nielsdos Thank you! |
I discovered another problematic case. $root = [];
$root[] = new stdClass;
$root[] = &$root;
echo serialize($root), "\n";
The issue here is that we're protecting nested arrays but not the root array. This can be solved by moving the array protection/recursion check from Lines 954 to 966 in 4fcb3e0
Unfortunately, there's another issue. I noticed that there are existing tests that look off.
Specifically, all these tests contain a simple, self-recursive array like With the proposed solution, we map flag the root array as visited right away. The second encounter fails because the array is already flagged as visited, and thus replaced with Tbh, I consider recursive arrays mostly useless and would prefer if PHP didn't support them at all. I'm happy completely ignoring this use case. |
Adapted from #11305.
The tracking of RC1 arrays was straight-forward. The most common case is likely passing a flat array of objects to serialize. Unfortunately, this will still result in an array with RC2 if the array is stored in a CV, because
ZEND_SEND_VAR[_EX]
increases the refcount. I'm trying to look for the correspondingZEND_SEND_VAR
opcode in the parent call frame. At least in terms of the synthetic benchmark the change seems worth it.Benchmark
Patch